Code
library(tidyverse)
library(rvest)
with whiskynote.be data
Tony Duan
bottle_list=c()
topic_list=c()
topic_link_list=c()
all_year_list_topic=c()
all_year_list_bottle=c()
for (i in url_list){
year=tail(unlist(strsplit(i, split = "/")),1)
print(year)
print(i)
year_ur=i
year_page <- read_html(year_ur)
bottle001 <- year_page %>% html_elements("p")%>% html_text2()
bottle003=unlist(strsplit(bottle001,"\n"))
topic001 <- year_page %>% html_elements(".archive-link") %>% html_text2()
topic_link_001 <- year_page %>%
html_elements(css = ".entry-permalink")%>% html_attr("href")
year_list_topic=rep(year,length(topic001))
year_list_bottle=rep(year,length(bottle003))
all_year_list_topic=c(all_year_list_topic,year_list_topic)
all_year_list_bottle=c(all_year_list_bottle,year_list_bottle)
bottle_list=c(bottle_list,bottle003)
topic_list=c(topic_list,topic001)
topic_link_list=c(topic_link_list,topic_link_001)
Sys.sleep(1)
}
---
title: "All year page"
subtitle: "with whiskynote.be data"
author: "Tony Duan"
execute:
warning: false
error: false
eval: false
format:
html:
toc: true
toc-location: right
code-fold: show
code-tools: true
number-sections: true
code-block-bg: true
code-block-border-left: "#31BAE9"
---
```{r}
library(tidyverse)
library(rvest)
```
```{r}
packageVersion("rvest")
```
# loop all year page
```{r}
year_list=seq(2010,2024)
year_list
```
```{r}
url_list=paste0('https://www.whiskynotes.be/',year_list)
url_list
```
```{r}
bottle_list=c()
topic_list=c()
topic_link_list=c()
all_year_list_topic=c()
all_year_list_bottle=c()
for (i in url_list){
year=tail(unlist(strsplit(i, split = "/")),1)
print(year)
print(i)
year_ur=i
year_page <- read_html(year_ur)
bottle001 <- year_page %>% html_elements("p")%>% html_text2()
bottle003=unlist(strsplit(bottle001,"\n"))
topic001 <- year_page %>% html_elements(".archive-link") %>% html_text2()
topic_link_001 <- year_page %>%
html_elements(css = ".entry-permalink")%>% html_attr("href")
year_list_topic=rep(year,length(topic001))
year_list_bottle=rep(year,length(bottle003))
all_year_list_topic=c(all_year_list_topic,year_list_topic)
all_year_list_bottle=c(all_year_list_bottle,year_list_bottle)
bottle_list=c(bottle_list,bottle003)
topic_list=c(topic_list,topic001)
topic_link_list=c(topic_link_list,topic_link_001)
Sys.sleep(1)
}
```
# combine
```{r}
data=tibble(topic_list,topic_link_list,all_year_list_topic)
```
```{r}
bottle003=tibble(bottle_list,all_year_list_bottle)
```
# output
```{r}
library(openxlsx)
list_of_datasets <- list("topic" = data, "bottle" = bottle003)
write.xlsx(list_of_datasets, file = "./output/all year page.xlsx")
```
# reference: